suppressPackageStartupMessages(library(tidyverse))
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
Settings
data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'
wd <- "/Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/"
setwd(wd)
figdir <- paste0(wd, 'Figures/Shortread/DETs/')
tabledir <- paste0(wd, 'Tables/Shortread/')
theme_set(
theme_classic(base_size = 7) +
theme(legend.position = 'bottom')
)
Functions
add_genetype2 <- function(df) {
df |>
mutate(
genetype2 = case_when(
gene_type == 'protein_coding' & seqname == 'chrM' ~ 'mt-mRNA',
gene_type == 'protein_coding' & seqname != 'chrM' ~ 'mRNA',
grepl('MT-RNR', gene_name) & seqname == 'chrM' ~ 'mt-rRNA',
grepl('MT-T', gene_name) & seqname == 'chrM' ~ 'mt-tRNA',
is.na(gene_type) ~ 'unannotated gene',
.default = 'other ncRNAs'
)
)
}
add_isDET <- function(df) {
df |>
rowwise() |>
mutate(
isUp = case_when(
max(siMETTL2A_G_pvalue, siMETTL2A_I_pvalue) < .05 &
min(siMETTL2A_G_log2FoldChange, siMETTL2A_I_log2FoldChange) > 0
~ 'common',
siMETTL2A_G_pvalue < .05 & siMETTL2A_G_log2FoldChange > 0
~ 'only G',
siMETTL2A_I_pvalue < .05 & siMETTL2A_I_log2FoldChange > 0
~ 'only I',
.default = 'not'),
isDown = case_when(
max(siMETTL2A_G_pvalue, siMETTL2A_I_pvalue) < .05 &
max(siMETTL2A_G_log2FoldChange, siMETTL2A_I_log2FoldChange) < 0
~ 'common',
siMETTL2A_G_pvalue < .05 & siMETTL2A_G_log2FoldChange < 0
~ 'only G',
siMETTL2A_I_pvalue < .05 & siMETTL2A_I_log2FoldChange < 0
~ 'only I',
.default = 'not')
) |>
mutate(
common_DETs = case_when(
isUp == 'common' ~ 'up',
isDown == 'common' ~ 'down',
.default = 'other'
)
) |>
ungroup()
}
Read data
Read shortread DESeq2 results
shortread_DESeq2_results <-
read_tsv(
paste0(wd, 'Tables/shortread_DESeq2_results_annotated_2024-04-05.tsv')
) |>
select(starts_with('gene_'), everything())
## Rows: 13418 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): gene_id, gene_type, gene_name, seqname
## dbl (18): siMETTL2A_baseMean, siMETTL2A_log2FoldChange, siMETTL2A_lfcSE, siM...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shortread_DESeq2_results
## # A tibble: 13,418 × 22
## gene_id gene_type gene_name siMETTL2A_baseMean siMETTL2A_log2FoldCh…¹
## <chr> <chr> <chr> <dbl> <dbl>
## 1 ENSG0000000045… protein_… SCYL3 1499. 0.242
## 2 ENSG0000000046… protein_… C1orf112 1060. -1.08
## 3 ENSG0000000146… protein_… STPG1 1857. -0.286
## 4 ENSG0000000146… protein_… NIPAL3 11719. 0.534
## 5 ENSG0000000445… protein_… AK2 15695. -0.548
## 6 ENSG0000000448… protein_… KDM1A 17975. 0.353
## 7 ENSG0000000655… protein_… TTC22 1441. 0.845
## 8 ENSG0000000734… protein_… ST7L 857. -0.0295
## 9 ENSG0000000792… protein_… DNAJC11 9679. 0.411
## 10 ENSG0000000812… <NA> <NA> 413. 0.231
## # ℹ 13,408 more rows
## # ℹ abbreviated name: ¹​siMETTL2A_log2FoldChange
## # ℹ 17 more variables: siMETTL2A_lfcSE <dbl>, siMETTL2A_stat <dbl>,
## # siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>, siMETTL2A_I_baseMean <dbl>,
## # siMETTL2A_I_log2FoldChange <dbl>, siMETTL2A_I_lfcSE <dbl>,
## # siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>, siMETTL2A_I_padj <dbl>,
## # siMETTL2A_G_baseMean <dbl>, siMETTL2A_G_log2FoldChange <dbl>, …
Join methylation info with DESeq2 result
shortread_DESeq2_results_DRSm3C <-
shortread_DESeq2_results |>
left_join(DRS_methylated_genes) |>
replace_na(list(methylation = '-'))
## Joining with `by = join_by(gene_id)`
shortread_DESeq2_results_DRSm3C
## # A tibble: 13,418 × 23
## gene_id gene_type gene_name siMETTL2A_baseMean siMETTL2A_log2FoldCh…¹
## <chr> <chr> <chr> <dbl> <dbl>
## 1 ENSG0000000045… protein_… SCYL3 1499. 0.242
## 2 ENSG0000000046… protein_… C1orf112 1060. -1.08
## 3 ENSG0000000146… protein_… STPG1 1857. -0.286
## 4 ENSG0000000146… protein_… NIPAL3 11719. 0.534
## 5 ENSG0000000445… protein_… AK2 15695. -0.548
## 6 ENSG0000000448… protein_… KDM1A 17975. 0.353
## 7 ENSG0000000655… protein_… TTC22 1441. 0.845
## 8 ENSG0000000734… protein_… ST7L 857. -0.0295
## 9 ENSG0000000792… protein_… DNAJC11 9679. 0.411
## 10 ENSG0000000812… <NA> <NA> 413. 0.231
## # ℹ 13,408 more rows
## # ℹ abbreviated name: ¹​siMETTL2A_log2FoldChange
## # ℹ 18 more variables: siMETTL2A_lfcSE <dbl>, siMETTL2A_stat <dbl>,
## # siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>, siMETTL2A_I_baseMean <dbl>,
## # siMETTL2A_I_log2FoldChange <dbl>, siMETTL2A_I_lfcSE <dbl>,
## # siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>, siMETTL2A_I_padj <dbl>,
## # siMETTL2A_G_baseMean <dbl>, siMETTL2A_G_log2FoldChange <dbl>, …
Add DET information and custom genetype
shortread_DESeq2_results_DRSm3C_DETinfo <-
shortread_DESeq2_results_DRSm3C |>
add_isDET() |>
add_genetype2()
shortread_DESeq2_results_DRSm3C_DETinfo |>
export_tsv(outdir = tabledir)
##
## Exported to: /Users/s-mitsutomi/My Drive (shuheimitsutomi@ric.u-tokyo.ac.jp)/Analysis/METTL2A/Tables/Shortread/shortread_DESeq2_results_DRSm3C_DETinfo_2024-04-06.tsv
## # A tibble: 13,418 × 27
## gene_id gene_type gene_name siMETTL2A_baseMean siMETTL2A_log2FoldCh…¹
## <chr> <chr> <chr> <dbl> <dbl>
## 1 ENSG0000000045… protein_… SCYL3 1499. 0.242
## 2 ENSG0000000046… protein_… C1orf112 1060. -1.08
## 3 ENSG0000000146… protein_… STPG1 1857. -0.286
## 4 ENSG0000000146… protein_… NIPAL3 11719. 0.534
## 5 ENSG0000000445… protein_… AK2 15695. -0.548
## 6 ENSG0000000448… protein_… KDM1A 17975. 0.353
## 7 ENSG0000000655… protein_… TTC22 1441. 0.845
## 8 ENSG0000000734… protein_… ST7L 857. -0.0295
## 9 ENSG0000000792… protein_… DNAJC11 9679. 0.411
## 10 ENSG0000000812… <NA> <NA> 413. 0.231
## # ℹ 13,408 more rows
## # ℹ abbreviated name: ¹​siMETTL2A_log2FoldChange
## # ℹ 22 more variables: siMETTL2A_lfcSE <dbl>, siMETTL2A_stat <dbl>,
## # siMETTL2A_pvalue <dbl>, siMETTL2A_padj <dbl>, siMETTL2A_I_baseMean <dbl>,
## # siMETTL2A_I_log2FoldChange <dbl>, siMETTL2A_I_lfcSE <dbl>,
## # siMETTL2A_I_stat <dbl>, siMETTL2A_I_pvalue <dbl>, siMETTL2A_I_padj <dbl>,
## # siMETTL2A_G_baseMean <dbl>, siMETTL2A_G_log2FoldChange <dbl>, …
shortread_DESeq2_results_DETinfo_groupedby_genetypes <-
shortread_DESeq2_results_DRSm3C_DETinfo |>
group_by(common_DETs, genetype2) |>
reframe(n = n()) |>
group_by(genetype2) |>
mutate(percentage = 100 * n / sum(n)) |>
arrange(genetype2)
shortread_DESeq2_results_DETinfo_groupedby_genetypes
## # A tibble: 14 × 4
## # Groups: genetype2 [6]
## common_DETs genetype2 n percentage
## <chr> <chr> <int> <dbl>
## 1 down mRNA 2248 21.6
## 2 other mRNA 6150 59.1
## 3 up mRNA 2007 19.3
## 4 other mt-mRNA 4 30.8
## 5 up mt-mRNA 9 69.2
## 6 up mt-rRNA 2 100
## 7 other mt-tRNA 6 85.7
## 8 up mt-tRNA 1 14.3
## 9 down other ncRNAs 168 5.91
## 10 other other ncRNAs 2411 84.8
## 11 up other ncRNAs 264 9.29
## 12 down unannotated gene 1 0.676
## 13 other unannotated gene 140 94.6
## 14 up unannotated gene 7 4.73
shortread_DESeq2_results_DETinfo_groupedby_methylation <-
shortread_DESeq2_results_DRSm3C_DETinfo |>
group_by(common_DETs, methylation) |>
reframe(n = n()) |>
group_by(methylation) |>
mutate(percentage = 100 * n / sum(n)) |>
arrange(methylation)
shortread_DESeq2_results_DETinfo_groupedby_methylation
## # A tibble: 6 × 4
## # Groups: methylation [2]
## common_DETs methylation n percentage
## <chr> <chr> <int> <dbl>
## 1 down + 6 7.59
## 2 other + 50 63.3
## 3 up + 23 29.1
## 4 down - 2411 18.1
## 5 other - 8661 64.9
## 6 up - 2267 17.0
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation <-
shortread_DESeq2_results_DRSm3C_DETinfo |>
group_by(common_DETs, methylation, genetype2) |>
reframe(n = n()) |>
group_by(methylation, genetype2) |>
mutate(percentage = 100 * n / sum(n)) |>
arrange(methylation, genetype2)
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation
## # A tibble: 19 × 5
## # Groups: methylation, genetype2 [8]
## common_DETs methylation genetype2 n percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 down + mRNA 6 8.82
## 2 other + mRNA 48 70.6
## 3 up + mRNA 14 20.6
## 4 other + mt-mRNA 2 22.2
## 5 up + mt-mRNA 7 77.8
## 6 up + mt-rRNA 2 100
## 7 down - mRNA 2242 21.7
## 8 other - mRNA 6102 59.0
## 9 up - mRNA 1993 19.3
## 10 other - mt-mRNA 2 50
## 11 up - mt-mRNA 2 50
## 12 other - mt-tRNA 6 85.7
## 13 up - mt-tRNA 1 14.3
## 14 down - other ncRNAs 168 5.91
## 15 other - other ncRNAs 2411 84.8
## 16 up - other ncRNAs 264 9.29
## 17 down - unannotated gene 1 0.676
## 18 other - unannotated gene 140 94.6
## 19 up - unannotated gene 7 4.73
Plot
shortread_DESeq2_results_DETinfo_groupedby_genetypes_barplot <-
shortread_DESeq2_results_DETinfo_groupedby_genetypes |>
ggplot(aes(x = fct_rev(genetype2), y = n, fill = common_DETs)) +
geom_bar(stat = 'identity', position = position_fill()) +
scale_y_reverse() +
scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
coord_flip()
shortread_DESeq2_results_DETinfo_groupedby_genetypes_barplot |>
ggsave_multiple_formats(
width = 5, height = 5, fontsize = 7, outdir = figdir
)

shortread_DESeq2_results_DETinfo_groupedby_methylation_barplot <-
shortread_DESeq2_results_DETinfo_groupedby_methylation |>
ggplot(aes(x = methylation, y = n, fill = common_DETs)) +
geom_bar(stat = 'identity', position = position_fill()) +
scale_y_reverse() +
scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
coord_flip()
shortread_DESeq2_results_DETinfo_groupedby_methylation_barplot |>
ggsave_multiple_formats(
width = 3.5, height = 2.5, fontsize = 7, outdir = figdir
)

shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation_barplot <-
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation |>
ggplot(aes(x = interaction(methylation, genetype2),
y = n, fill = common_DETs)) +
geom_bar(stat = 'identity', position = position_fill()) +
scale_y_reverse() +
scale_x_discrete(guide = ggh4x::guide_axis_nested(delim = '.')) +
scale_fill_manual(values = c('#3e3ef2', 'grey', '#f23e3e')) +
coord_flip()
shortread_DESeq2_results_DETinfo_groupedby_genetypes_methylation_barplot |>
ggsave_multiple_formats(
width = 6, height = 6, fontsize = 7, outdir = figdir
)
## Warning: The S3 guide system was deprecated in ggplot2 3.5.0.
## ℹ It has been replaced by a ggproto system that can be extended.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
